In [8]:
import os
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix
import joblib

data_directory = 'Données/'  # Adapter selon votre environnement

sensor_columns = ['AccelX', 'AccelY', 'AccelZ', 'GyroX', 'GyroY', 'GyroZ']

def low_pass_filter(data, alpha=0.1):
    filtered = [data[0]]
    for i in range(1, len(data)):
        filtered.append(alpha * data[i] + (1 - alpha) * filtered[i-1])
    return filtered

def compute_features(df, cols):
    features = []
    for col in cols:
        col_data = df[col].values
        # Application du filtre passe-bas sur la colonne
        col_data_filtered = low_pass_filter(col_data)
        col_data_filtered = np.array(col_data_filtered)

        mean_val = col_data_filtered.mean()
        std_val = col_data_filtered.std()
        min_val = col_data_filtered.min()
        max_val = col_data_filtered.max()
        median_val = np.median(col_data_filtered)
        iqr_val = np.percentile(col_data_filtered, 75) - np.percentile(col_data_filtered, 25)
        skew_val = pd.Series(col_data_filtered).skew()
        kurt_val = pd.Series(col_data_filtered).kurt()
        energy_val = np.sum(col_data_filtered**2)
        
        fft_vals = np.fft.rfft(col_data_filtered)
        fft_ampl = np.abs(fft_vals)
        top_freqs = np.sort(fft_ampl)[-3:]
        
        features.extend([mean_val, std_val, min_val, max_val, median_val, iqr_val,
                         skew_val, kurt_val, energy_val] + list(top_freqs))
    return features

# Vérification du répertoire
if not os.path.exists(data_directory):
    print(f"Le répertoire {data_directory} n'existe pas. Vérifiez le chemin.")
    exit()

files = [f for f in os.listdir(data_directory) if f.endswith('.txt')]
if len(files) == 0:
    print("Aucun fichier .txt trouvé dans le répertoire.")
    exit()

all_data = []
all_labels = []

for filename in files:
    label = os.path.splitext(filename)[0]
    file_path = os.path.join(data_directory, filename)
    
    with open(file_path, 'r', encoding='utf-8') as file:
        lines = file.readlines()
    
    data_lines = []
    recording = False
    for line in lines:
        line = line.strip()
        if line == 'Recording started...':
            recording = True
            continue
        elif line == 'Recording stopped.':
            recording = False
            continue
        if recording:
            data_lines.append(line)
    
    file_data = []
    for data_line in data_lines:
        try:
            data_values = list(map(float, data_line.split(',')))
            if len(data_values) == 7:
                file_data.append(data_values)
        except ValueError:
            continue
    
    if len(file_data) > 0:
        df = pd.DataFrame(file_data, columns=['Time_ms'] + sensor_columns)
        df = df.drop(columns=['Time_ms'])
        
        feats = compute_features(df, sensor_columns)
        all_data.append(feats)
        all_labels.append(label)

if len(all_data) == 0:
    print("Aucune donnée extraite. Vérifiez le contenu de vos fichiers.")
    exit()

stats_names = ['mean', 'std', 'min', 'max', 'median', 'iqr', 'skew', 'kurt', 'energy', 'topFreq1', 'topFreq2', 'topFreq3']
feature_names = []
for col in sensor_columns:
    for stat_name in stats_names:
        feature_names.append(f"{col}_{stat_name}")

data_df = pd.DataFrame(all_data, columns=feature_names)
data_df['Label'] = all_labels

X = data_df.drop('Label', axis=1)
y = data_df['Label']

print("Répartition des classes :")
print(y.value_counts())

label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

if len(np.unique(y_encoded)) < 2:
    print("Moins de deux classes, classification impossible.")
    exit()

X_train, X_test, y_train_enc, y_test_enc = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

if len(X_train) == 0:
    print("Aucune donnée dans le train set après le split. Pas assez de données.")
    exit()

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

model = RandomForestClassifier(random_state=42)
model.fit(X_train_scaled, y_train_enc)

y_pred = model.predict(X_test_scaled)

present_classes = np.unique(y_test_enc)
present_class_names = label_encoder.inverse_transform(present_classes)

print("Rapport de classification :")
print(classification_report(y_test_enc, y_pred, labels=present_classes, target_names=present_class_names, zero_division=0))

print("Matrice de confusion :")
print(confusion_matrix(y_test_enc, y_pred, labels=present_classes))

joblib.dump(model, 'model_simple.joblib')
joblib.dump(scaler, 'scaler_simple.joblib')
joblib.dump(label_encoder, 'label_encoder_simple.joblib')

print("Modèle, scaler et label encoder sauvegardés avec succès.")


Répartition des classes :
Label
Mouvement_1_Horizontal          1
Mouvement_2_Horizontal_Alt      1
Mouvement_3_Bloquage            1
Mouvement_5_vertical            1
Mouvement_9_Pas_de_Mouvement    1
Name: count, dtype: int64
Rapport de classification :
                            precision    recall  f1-score   support

Mouvement_2_Horizontal_Alt       0.00      0.00      0.00       1.0

                 micro avg       0.00      0.00      0.00       1.0
                 macro avg       0.00      0.00      0.00       1.0
              weighted avg       0.00      0.00      0.00       1.0

Matrice de confusion :
[[0]]
Modèle, scaler et label encoder sauvegardés avec succès.


In [11]:
import os
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix
import joblib

data_directory = 'Données/'  # Adapter selon votre environnement

sensor_columns = ['AccelX', 'AccelY', 'AccelZ', 'GyroX', 'GyroY', 'GyroZ']

def low_pass_filter(data, alpha=0.1):
    filtered = [data[0]]
    for i in range(1, len(data)):
        filtered.append(alpha * data[i] + (1 - alpha) * filtered[i-1])
    return filtered

def compute_sensor_features(df, cols):
    # Calcule les features sur les données de capteurs
    features = []
    for col in cols:
        col_data = df[col].values
        col_data_filtered = np.array(low_pass_filter(col_data))
        
        mean_val = col_data_filtered.mean()
        std_val = col_data_filtered.std()
        min_val = col_data_filtered.min()
        max_val = col_data_filtered.max()
        median_val = np.median(col_data_filtered)
        iqr_val = np.percentile(col_data_filtered, 75) - np.percentile(col_data_filtered, 25)
        skew_val = pd.Series(col_data_filtered).skew()
        kurt_val = pd.Series(col_data_filtered).kurt()
        energy_val = np.sum(col_data_filtered**2)
        
        fft_vals = np.fft.rfft(col_data_filtered)
        fft_ampl = np.abs(fft_vals)
        top_freqs = np.sort(fft_ampl)[-3:]
        
        stats_check = [mean_val, std_val, min_val, max_val, median_val, iqr_val,
                       skew_val, kurt_val, energy_val] + list(top_freqs)
        if any(pd.isna(s) for s in stats_check):
            print(f"Attention : NaN dans les features de la colonne {col}.")
        if any(np.isinf(s) for s in stats_check):
            print(f"Attention : Inf dans les features de la colonne {col}.")
        
        features.extend(stats_check)
    return features

def compute_time_features(time_data):
    # time_data est la colonne de timestamps (par ex. 4195, 4218, etc.)
    duration = time_data.max() - time_data.min()
    intervals = np.diff(time_data)
    mean_interval = intervals.mean() if len(intervals) > 0 else 0
    std_interval = intervals.std() if len(intervals) > 0 else 0
    return [duration, mean_interval, std_interval]

if not os.path.exists(data_directory):
    print(f"Le répertoire {data_directory} n'existe pas. Vérifiez le chemin.")
    exit()

files = [f for f in os.listdir(data_directory) if f.endswith('.txt')]
if len(files) == 0:
    print("Aucun fichier .txt trouvé dans le répertoire.")
    exit()

# Noms des features calculées sur les capteurs
stats_names = ['mean', 'std', 'min', 'max', 'median', 'iqr', 'skew', 'kurt', 'energy', 'topFreq1', 'topFreq2', 'topFreq3']
feature_names = []
for col in sensor_columns:
    for stat_name in stats_names:
        feature_names.append(f"{col}_{stat_name}")

# Ajout des features temporelles
time_feature_names = ['Time_duration', 'Time_mean_interval', 'Time_std_interval']
all_feature_names = time_feature_names + feature_names

all_data = []
all_labels = []
file_count = 0
block_count = 0

for filename in files:
    label = os.path.splitext(filename)[0]
    file_path = os.path.join(data_directory, filename)
    
    with open(file_path, 'r', encoding='utf-8') as file:
        lines = file.readlines()
    
    recording_blocks = []
    current_block = []
    recording = False
    for line in lines:
        line = line.strip()
        if line == 'Recording started...':
            recording = True
            current_block = []
            continue
        elif line == 'Recording stopped.':
            recording = False
            if len(current_block) > 0:
                recording_blocks.append(current_block)
            current_block = []
            continue
        if recording:
            current_block.append(line)
    
    print(f"\nFichier : {filename}")
    print(f"Label associé : {label}")
    print(f"Nombre de blocs 'Recording started.../stopped.' trouvés : {len(recording_blocks)}")

    for i, block in enumerate(recording_blocks):
        file_data = []
        for idx, data_line in enumerate(block):
            try:
                data_values = list(map(float, data_line.split(',')))
                if len(data_values) == 7:
                    # data_values[0] = Time_ms, ensuite AccelX, AccelY, AccelZ, GyroX, GyroY, GyroZ
                    file_data.append(data_values)
                else:
                    print(f"Ligne ignorée dans le bloc {i} du fichier {filename} car elle n'a pas 7 valeurs.")
            except ValueError:
                print(f"Ligne non convertible en float dans le bloc {i} du fichier {filename}. Ignorée.")

        if len(file_data) == 0:
            print(f"Bloc {i} du fichier {filename} est vide après filtrage.")
            continue

        df = pd.DataFrame(file_data, columns=['Time_ms'] + sensor_columns)
        
        # Calcul des features temporelles
        time_data = df['Time_ms'].values
        time_feats = compute_time_features(time_data)

        # Calcul des features capteurs
        df_sensors = df.drop(columns=['Time_ms'])
        sensor_feats = compute_sensor_features(df_sensors, sensor_columns)

        feats = time_feats + sensor_feats

        if file_count == 0 and block_count == 0:
            print("Exemple de données brutes du premier bloc du premier fichier :")
            print(df.head())
            print("Exemple de features extraites pour le premier bloc du premier fichier :")
            for fname, val in zip(all_feature_names, feats):
                print(f"{fname}: {val}")

        all_data.append(feats)
        all_labels.append(label)
        block_count += 1
    file_count += 1

if len(all_data) == 0:
    print("Aucune donnée extraite. Vérifiez le contenu de vos fichiers.")
    exit()

data_df = pd.DataFrame(all_data, columns=all_feature_names)
data_df['Label'] = all_labels

X = data_df.drop('Label', axis=1)
y = data_df['Label']

print("\nRépartition des classes :")
class_counts = y.value_counts()
print(class_counts)

if len(class_counts) < 2:
    print("Moins de deux classes, classification impossible.")
    exit()

label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

X_train, X_test, y_train_enc, y_test_enc = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

if len(X_train) == 0:
    print("Aucune donnée dans le train set après le split. Pas assez de données.")
    exit()

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

model = RandomForestClassifier(random_state=42)
model.fit(X_train_scaled, y_train_enc)

y_pred = model.predict(X_test_scaled)

present_classes = np.unique(y_test_enc)
present_class_names = label_encoder.inverse_transform(present_classes)

print("\nRapport de classification :")
print(classification_report(y_test_enc, y_pred, labels=present_classes, target_names=present_class_names, zero_division=0))

print("Matrice de confusion :")
print(confusion_matrix(y_test_enc, y_pred, labels=present_classes))

joblib.dump(model, 'model_simple.joblib')
joblib.dump(scaler, 'scaler_simple.joblib')
joblib.dump(label_encoder, 'label_encoder_simple.joblib')

print("\nVérification terminée. Les features (y compris temporelles) ont été calculées, le modèle entraîné et sauvegardé.")
print(f"Nombre total de fichiers traités : {file_count}")
print(f"Nombre total d'échantillons (blocs) : {block_count}")
for c in class_counts.index:
    print(f"Classe '{c}': {class_counts[c]} échantillons.")



Fichier : Mouvement_1_Horizontal.txt
Label associé : Mouvement_1_Horizontal
Nombre de blocs 'Recording started.../stopped.' trouvés : 201
Exemple de données brutes du premier bloc du premier fichier :
   Time_ms     AccelX     AccelY    AccelZ     GyroX     GyroY     GyroZ
0   1945.0   8.734048   2.899378 -5.530606 -0.464577  0.089798 -1.026946
1   1968.0   9.397242   6.004658 -3.962404 -0.414083  0.120308 -0.766613
2   1991.0   9.952697   6.002264 -3.016694 -0.131499  0.045299 -0.544649
3   2014.0  10.338163   9.035717 -1.795651 -0.123106 -0.154015 -0.296839
4   2037.0   9.622296  12.344504 -3.900155 -0.344536 -0.133764  0.227825
Exemple de features extraites pour le premier bloc du premier fichier :
Time_duration: 529.0
Time_mean_interval: 23.0
Time_std_interval: 0.0
AccelX_mean: 4.263682710131825
AccelX_std: 5.337957930207203
AccelX_min: -7.723982893684861
AccelX_max: 9.114300561600002
AccelX_median: 6.597106113789129
AccelX_iqr: 7.495223827969312
AccelX_skew: -1.0092765709139946
A